{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from os import listdir\n",
    "from keras.models import model_from_json\n",
    "from keras.preprocessing.text import Tokenizer\n",
    "from keras.preprocessing.sequence import pad_sequences\n",
    "from nltk.translate.bleu_score import sentence_bleu\n",
    "from tqdm import tqdm\n",
    "import numpy as np\n",
    "import h5py as h5py\n",
    "from compiler.classes.Compiler import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Read a file and return a string\n",
    "def load_doc(filename):\n",
    "    file = open(filename, 'r')\n",
    "    text = file.read()\n",
    "    file.close()\n",
    "    return text\n",
    "\n",
    "def load_data(data_dir):\n",
    "    text = []\n",
    "    images = []\n",
    "    # Load all the files and order them\n",
    "    all_filenames = listdir(data_dir)\n",
    "    all_filenames.sort()\n",
    "    for filename in (all_filenames)[-2:]:\n",
    "        if filename[-3:] == \"npz\":\n",
    "            # Load the images already prepared in arrays\n",
    "            image = np.load(data_dir+filename)\n",
    "            images.append(image['features'])\n",
    "        else:\n",
    "            # Load the boostrap tokens and rap them in a start and end tag\n",
    "            syntax = '<START> ' + load_doc(data_dir+filename) + ' <END>'\n",
    "            # Seperate all the words with a single space\n",
    "            syntax = ' '.join(syntax.split())\n",
    "            # Add a space after each comma\n",
    "            syntax = syntax.replace(',', ' ,')\n",
    "            text.append(syntax)\n",
    "    images = np.array(images, dtype=float)\n",
    "    return images, text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Initialize the function to create the vocabulary \n",
    "tokenizer = Tokenizer(filters='', split=\" \", lower=False)\n",
    "# Create the vocabulary \n",
    "tokenizer.fit_on_texts([load_doc('resources/bootstrap.vocab')])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dir_name = '../../../../eval/'\n",
    "train_features, texts = load_data(dir_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#load model and weights \n",
    "json_file = open('../../../../model.json', 'r')\n",
    "loaded_model_json = json_file.read()\n",
    "json_file.close()\n",
    "loaded_model = model_from_json(loaded_model_json)\n",
    "# load weights into new model\n",
    "loaded_model.load_weights(\"../../../../weights.hdf5\")\n",
    "print(\"Loaded model from disk\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# map an integer to a word\n",
    "def word_for_id(integer, tokenizer):\n",
    "    for word, index in tokenizer.word_index.items():\n",
    "        if index == integer:\n",
    "            return word\n",
    "    return None\n",
    "print(word_for_id(17, tokenizer))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# generate a description for an image\n",
    "def generate_desc(model, tokenizer, photo, max_length):\n",
    "    photo = np.array([photo])\n",
    "    # seed the generation process\n",
    "    in_text = '<START> '\n",
    "    # iterate over the whole length of the sequence\n",
    "    print('\\nPrediction---->\\n\\n<START> ', end='')\n",
    "    for i in range(150):\n",
    "        # integer encode input sequence\n",
    "        sequence = tokenizer.texts_to_sequences([in_text])[0]\n",
    "        # pad input\n",
    "        sequence = pad_sequences([sequence], maxlen=max_length)\n",
    "        # predict next word\n",
    "        yhat = loaded_model.predict([photo, sequence], verbose=0)\n",
    "        # convert probability to integer\n",
    "        yhat = np.argmax(yhat)\n",
    "        # map integer to word\n",
    "        word = word_for_id(yhat, tokenizer)\n",
    "        # stop if we cannot map the word\n",
    "        if word is None:\n",
    "            break\n",
    "        # append as input for generating the next word\n",
    "        in_text += word + ' '\n",
    "        # stop if we predict the end of the sequence\n",
    "        print(word + ' ', end='')\n",
    "        if word == '<END>':\n",
    "            break\n",
    "    return in_text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "max_length = 48 \n",
    "# evaluate the skill of the model\n",
    "def evaluate_model(model, descriptions, photos, tokenizer, max_length):\n",
    "    actual, predicted = list(), list()\n",
    "    # step over the whole set\n",
    "    for i in range(len(texts)):\n",
    "        yhat = generate_desc(model, tokenizer, photos[i], max_length)\n",
    "        # store actual and predicted\n",
    "        print('\\n\\nReal---->\\n\\n' + texts[i])\n",
    "        actual.append([texts[i].split()])\n",
    "        predicted.append(yhat.split())\n",
    "    # calculate BLEU score\n",
    "    bleu = corpus_bleu(actual, predicted)\n",
    "    return bleu, actual, predicted\n",
    "\n",
    "bleu, actual, predicted = evaluate_model(loaded_model, texts, train_features, tokenizer, max_length)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Compile the tokens into HTML and css\n",
    "dsl_path = \"compiler/assets/web-dsl-mapping.json\"\n",
    "compiler = Compiler(dsl_path)\n",
    "compiled_website = compiler.compile(predicted[0], 'index.html')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(compiled_website )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(bleu)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.4.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
